Load data
## scRNA-Seq data
sce.sc <- readRDS(opt$scesc)
## single-cell cell types
types <- sce.sc[["types"]]
## Load tcr data
immTab <- sce.sc[["immTab"]]
sce.sc <- sce.sc[["SCE"]]
## sce per pat
sce.pat <- readRDS(opt$scescpat)
## Load pathway annotation
path.info <- read_excel(opt$pathinfo)
Overview scRNA-seq
## Randomly sample
set.seed(100)
sce.sc <- sce.sc[, sample(1:ncol(sce.sc), ncol(sce.sc))]
## Re-run UMAP
set.seed(100)
sce.sc <- runUMAP(sce.sc, n_neighbors = 15, min_dist = 0.75, name = "UMAP", BPPARAM = mcparam)
## Make plots
plotTab <- ggcells(sce.sc)[[1]]
## Cell type
p <- plotTab %>%
left_join(types, by = "uniqueBarcode") %>%
mutate(celltype = ifelse(celltype %in% c("malignant B-cell", "malignant T-cell"), Diagnosis_simple, celltype)) %>%
ggplot(aes(x = UMAP.1, y = UMAP.2, fill = celltype)) +
geom_point(shape = 21, size = 2.5, stroke = 0.25) +
lgd + theme_void() +
scale_fill_manual(values = c("MCL" = "#F44336", "T-PLL" = "#64B5F6",
"healthy T-cell" = "#0D47A1", "NK-cell" = "#AB47BC",
"healthy B-cell" = "#FFA000", "HCL" = "#FFD45F",
"T-LGL" = "#80DEEA",
"monocyte" = "#66BB6A")) +
theme(legend.position = "none")
p

ggsave(plot = p, filename = paste0(opt$plot, "scrna_umap_celltypes.png"),
height = 15, width = 15, units = "cm")
## Treatment
p <- plotTab %>%
left_join(types, by = "uniqueBarcode") %>%
mutate(celltype = ifelse(celltype %in% c("malignant B-cell", "malignant T-cell"), Diagnosis_simple, celltype)) %>%
ggplot(aes(x = UMAP.1, y = UMAP.2, fill = Treatment)) +
geom_point(shape = 21, size = 2.5, stroke = 0.25) +
lgd + theme_void() +
scale_fill_manual(values = c("DMSO" = "grey70", "Birinapant" = "#AD1457", "Nutlin-3a" = "#FFA000",
"Ibrutinib" = "#283593", "Everolimus" = "#43A047",
"Selumetinib" = "#F8BBD0")) +
theme(legend.position = "none")
p

ggsave(plot = p, filename = paste0(opt$plot, "scrna_umap_treatment.png"),
height = 15, width = 15, units = "cm")
## PatientID
p <- plotTab %>%
left_join(types, by = "uniqueBarcode") %>%
mutate(celltype = ifelse(celltype %in% c("malignant B-cell", "malignant T-cell"), Diagnosis_simple, celltype)) %>%
ggplot(aes(x = UMAP.1, y = UMAP.2, fill = patientID)) +
geom_point(shape = 21, size = 2.5, stroke = 0.25) +
lgd + theme_void() +
scale_fill_npg() +
theme(legend.position = "none")
p

ggsave(plot = p, filename = paste0(opt$plot, "scrna_umap_patientID.png"),
height = 15, width = 15, units = "cm")
Show donut plots
plotTab.vdj <- plotTab %>%
left_join(types, by = "uniqueBarcode") %>%
filter(celltype %in% c("malignant T-cell", "healthy T-cell")) %>%
mutate(celltype = ifelse(celltype == "malignant T-cell", patientID, celltype),
celltype = ifelse(celltype == "healthy T-cell", "Healthy T-Cells", celltype))
plotTab.vdj[plotTab.vdj$celltype == "H371" , "celltype"] <- "T-PLL1"
plotTab.vdj[plotTab.vdj$celltype == "H431", "celltype"] <- "T-PLL2"
plotTab.vdj[plotTab.vdj$celltype == "H279", "celltype"] <- "T-PLL3"
plotTab.vdj[plotTab.vdj$celltype == "H501", "celltype"] <- "T-LGL1"
## Merge VDJ data with plotting info
plotTab.vdj <- plotTab.vdj %>% left_join(select(immTab, cell_id, clone_id, c_call, #mu_freq, sequence_alignment
),
by = c("uniqueBarcode" = "cell_id"))
## Calculate the number of clones per cell type
countTab <- plotTab.vdj %>%
group_by(celltype) %>%
select(celltype, clone_id) %>% unique() %>%
dplyr::count(celltype) %>% mutate(n_clones = n) %>%
select(-n)
countTab
## # A tibble: 6 × 2
## # Groups: celltype [6]
## celltype n_clones
## <chr> <int>
## 1 H525 1
## 2 Healthy T-Cells 1695
## 3 T-LGL1 1
## 4 T-PLL1 2
## 5 T-PLL2 2
## 6 T-PLL3 2
## Calculate the size of each clone
countSize <- plotTab.vdj %>%
filter(!is.na(clone_id)) %>%
group_by(celltype) %>%
select(celltype, clone_id) %>%
dplyr::count(celltype, clone_id) %>% mutate(clone_size = n) %>%
select(-n)
countSize
## # A tibble: 1,697 × 3
## # Groups: celltype [4]
## celltype clone_id clone_size
## <chr> <chr> <int>
## 1 Healthy T-Cells TCR_1001_250 1
## 2 Healthy T-Cells TCR_1005_512 1
## 3 Healthy T-Cells TCR_1006_1035 1
## 4 Healthy T-Cells TCR_1008 1
## 5 Healthy T-Cells TCR_1008_1376 1
## 6 Healthy T-Cells TCR_1009_1739 1
## 7 Healthy T-Cells TCR_1010_1005 1
## 8 Healthy T-Cells TCR_1011_1629 2
## 9 Healthy T-Cells TCR_1012_1017 1
## 10 Healthy T-Cells TCR_1012_1413 1
## # ℹ 1,687 more rows
###################
# Donut charts of clonal composition
###################
## Calculate the size of each clone - don't subset to only cells with clone_id
# otherwise NA T-cells will not be in the plot later.
cloneCount <- plotTab.vdj %>%
group_by(celltype) %>%
dplyr::count(clone_id) %>% ungroup()
cloneCount
## # A tibble: 1,703 × 3
## celltype clone_id n
## <chr> <chr> <int>
## 1 H525 <NA> 1
## 2 Healthy T-Cells TCR_1001_250 1
## 3 Healthy T-Cells TCR_1005_512 1
## 4 Healthy T-Cells TCR_1006_1035 1
## 5 Healthy T-Cells TCR_1008 1
## 6 Healthy T-Cells TCR_1008_1376 1
## 7 Healthy T-Cells TCR_1009_1739 1
## 8 Healthy T-Cells TCR_1010_1005 1
## 9 Healthy T-Cells TCR_1011_1629 2
## 10 Healthy T-Cells TCR_1012_1017 1
## # ℹ 1,693 more rows
## This script was written for a B-cell analysis. Therefore, some labels
# might refer to B-cells, but we're looking at T-cells at the moment.
cloneNum <- lapply(unique(plotTab.vdj$celltype), function(x) {
subTab <- filter(plotTab.vdj, celltype == x) # subset to each patient
countTab <- dplyr::count(subTab, clone_id)
nclones <- countTab %>% nrow() # get the number of clones
ncells_bcr <- nrow(subTab) # calculate the number of B or T cells per patient - important to subset plotTab to only B or T-cell clusters before doing this
res <- data.frame(patID = x, nclones = nclones, ncells_bcr = ncells_bcr)
res$type <- opt$subType
res
}) %>% bind_rows()
cloneNum
## patID nclones ncells_bcr
## 1 T-PLL2 2 9642
## 2 T-LGL1 1 5340
## 3 T-PLL1 2 8169
## 4 T-PLL3 2 6778
## 5 Healthy T-Cells 1695 3032
## 6 H525 1 1
df <- left_join(cloneCount, cloneNum, by = c("celltype" = "patID")) %>%
group_by(celltype) %>% mutate(clone_ratio = n / ncells_bcr) %>% # divide the size of the clone
select(celltype, clone_id, clone_ratio, nclones) %>% unique() %>%
mutate(ymax = cumsum(clone_ratio), ymin = c(0, head(ymax, n= -1)))
df
## # A tibble: 1,703 × 6
## # Groups: celltype [6]
## celltype clone_id clone_ratio nclones ymax ymin
## <chr> <chr> <dbl> <int> <dbl> <dbl>
## 1 H525 <NA> 1 1 1 0
## 2 Healthy T-Cells TCR_1001_250 0.000330 1695 0.000330 0
## 3 Healthy T-Cells TCR_1005_512 0.000330 1695 0.000660 0.000330
## 4 Healthy T-Cells TCR_1006_1035 0.000330 1695 0.000989 0.000660
## 5 Healthy T-Cells TCR_1008 0.000330 1695 0.00132 0.000989
## 6 Healthy T-Cells TCR_1008_1376 0.000330 1695 0.00165 0.00132
## 7 Healthy T-Cells TCR_1009_1739 0.000330 1695 0.00198 0.00165
## 8 Healthy T-Cells TCR_1010_1005 0.000330 1695 0.00231 0.00198
## 9 Healthy T-Cells TCR_1011_1629 0.000660 1695 0.00297 0.00231
## 10 Healthy T-Cells TCR_1012_1017 0.000330 1695 0.00330 0.00297
## # ℹ 1,693 more rows
#### Overview over all patients/celltypes
pClone.Pat <- df %>%
filter(celltype %in% c("T-PLL1", "T-PLL2", "T-PLL3", "Healthy T-Cells")) %>%
ggplot(aes(ymax = ymax, ymin = ymin, xmax=4, xmin=3, fill = clone_id)) +
geom_rect(colour = "black", size = 0.25) +
coord_polar(theta="y") +
xlim(c(2, 4)) +
# ggtitle(paste0("Clonal Composition per Patient - ", opt$subType)) +
facet_wrap(. ~ celltype, nrow = 1) +
theme_void() +
theme(plot.title = element_text(hjust = 0.5),
strip.background = element_blank(),
strip.text.x = element_text(size = 12.5),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_rect(fill = "transparent",colour = NA),
plot.background = element_rect(fill = "transparent",colour = NA),
legend.position = "none")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
pClone.Pat

#### Output individual patients
subPat <- c("T-PLL1", "T-PLL2", "T-PLL3", "Healthy T-Cells")
lapply(subPat, function(subPat) {
pClone <- df %>% filter(celltype == subPat) %>%
ggplot(aes(ymax = ymax, ymin = ymin, xmax=4, xmin=3, fill = clone_id)) +
geom_rect(colour = "black", size = 0.25) +
coord_polar(theta="y") +
xlim(c(2, 4)) +
ggtitle(paste0(subPat)) +
theme_void() +
theme(plot.title = element_text(hjust = 0.5, size = 75),
legend.position = "none")
pClone
ggsave(plot = pClone, filename = paste0(opt$plot, "tcr_comp_", subPat, ".png"),
height = 20, width = 20, units = "cm")
})
## [[1]]
## [1] "plots/Fig4/tcr_comp_T-PLL1.png"
##
## [[2]]
## [1] "plots/Fig4/tcr_comp_T-PLL2.png"
##
## [[3]]
## [1] "plots/Fig4/tcr_comp_T-PLL3.png"
##
## [[4]]
## [1] "plots/Fig4/tcr_comp_Healthy T-Cells.png"
Perform differential gene expression testing - scRNA-Seq
opt$subsample <- TRUE
compDrug <- c("Nutlin-3a", "Birinapant", "Ibrutinib", "Everolimus", "Selumetinib")
sce_sub <- sce.sc
plotTab <- ggcells(sce_sub)[[1]]
colData(sce_sub) <- colData(sce_sub) %>%
data.frame() %>%
left_join(types, by = "uniqueBarcode") %>%
mutate(celltype = ifelse(celltype %in% c("malignant B-cell", "malignant T-cell"), Diagnosis_simple, celltype)) %>%
mutate(celltype = ifelse(celltype %in% c("healthy T-cell"), "Healthy T-Cells", celltype)) %>%
mutate(celltype = ifelse(celltype %in% c("NK-cell"), "NK-Cells", celltype)) %>%
mutate(celltype = ifelse(celltype %in% c("monocyte"), "Monocytes", celltype)) %>%
mutate(celltype = ifelse(patientID%in% c("H501") & celltype %in% c("MCL", "HCL", "T-PLL"), "dead cell", celltype)) %>%
mutate(celltype = ifelse(patientID%in% c("H431", "H371", "H279") & celltype %in% c("MCL", "HCL", "T-LGL"), "dead cell", celltype)) %>%
mutate(celltype = ifelse(patientID%in% c("H526", "H525") & celltype %in% c("MCL", "T-PLL", "T-LGL"), "dead cell", celltype)) %>%
mutate(celltype = ifelse(patientID%in% c("H496", "H432", "P1029") & celltype %in% c("HCL", "T-PLL", "T-LGL"), "dead cell", celltype)) %>%
DataFrame()
if(opt$subsample == TRUE) {
cellSub <- c("HCL", "MCL", "T-PLL", "Healthy T-Cells")
} else {
cellSub <- c("HCL", "MCL", "T-PLL")
}
testTreat <- c("Birinapant", "Nutlin-3a", "Ibrutinib", "Everolimus", "Selumetinib")
countTab <- plotTab %>%
left_join(types, by = "uniqueBarcode") %>%
mutate(celltype = ifelse(celltype %in% c("malignant B-cell", "malignant T-cell"), Diagnosis_simple, celltype)) %>%
mutate(celltype = ifelse(celltype %in% c("healthy T-cell"), "Healthy T-Cells", celltype)) %>%
mutate(celltype = ifelse(celltype %in% c("NK-cell"), "NK-Cells", celltype)) %>%
mutate(celltype = ifelse(celltype %in% c("monocyte"), "Monocytes", celltype)) %>%
group_by(celltype, Treatment) %>%
dplyr::count() %>%
filter(celltype %in% cellSub, Treatment %in% testTreat) %>%
arrange(n)
minCells <- countTab[1, ]$n
minCells
## [1] 287
de.res.2020 <- lapply(testTreat, function(z) {
message(paste0("Fitting model for ", z))
lapply(cellSub, #mc.cores = ncores,
function(x) {
message(paste0("Celltype ", x))
sce.x <- sce_sub[, sce_sub$celltype == x]
sce.x <- sce.x[, sce.x$Treatment %in% c("DMSO", testTreat)]
message(ncol(sce.x), " cells")
if(opt$subsample == TRUE) {
message("Subsetting to ", minCells)
sce.x <- do.call(cbind, lapply(unique(sce.x$Treatment), function(y) {
sce.y <- sce.x[, sce.x$Treatment == y]
if(ncol(sce.y) == minCells) {sce.y} else {
set.seed(101)
sce.y <- sce.y[,sample(1:ncol(sce.y), minCells)]
sce.y
}
sce.y
}))
}
summed <- summarizeAssayByGroup(sce.x, ids = colData(sce.x)[,c("Diagnosis_simple", "patientID", "Treatment")],
statistics = "sum", assay.type = "counts") # get raw counts and sum them up
# summed <- applySCE(sce.x, aggregateAcrossCells, ids=colData(sce.x)[,c("Diagnosis_simple", "patientID", "Treatment")])
colData(summed)$Treatment <- factor(colData(summed)$Treatment, levels = c("DMSO", compDrug))
y <- DGEList(assay(summed), samples = colData(summed))
opt$subsample
if(opt$subsample == TRUE) {
discarded <- summed$ncells < 5
} else {
discarded <- summed$ncells < 30
}
y <- y[,!discarded]
message(paste0("Discarding ", sum(discarded), " samples"))
design <- model.matrix(~0 + factor(patientID) + Treatment, y$samples)
keep <- filterByExpr(y, design = design)
y <- y[keep,]
summary(keep)
message(paste0("Keeping ", sum(keep), " genes"))
y <- calcNormFactors(y)
# head(design)
y <- estimateDisp(y, design) # gene-wise dispersion (variance on top of poisson distribution)
#plotBCV(y)
fit <- glmQLFit(y, design, robust=TRUE) # QL way of fitting the model
#plotQLDisp(fit)
res <- glmQLFTest(fit, coef = paste0("Treatment", z))
# #res <- glmQLFTest(fit, coef = "TreatmentNutlin-3a")
summary(decideTests(res))
resTab <- res$table
resTab$gene <- rownames(resTab)
resTab$celltype <- x
resTab <- resTab #%>%
#mutate(p.adj = ifelse(p.adj < 1e-12, 1e-12, p.adj))
resTab$Treatment <- z
resTab %>% arrange(desc(logFC)) %>% mutate(p.adj = p.adjust(PValue, method = "BH"))
}) %>% bind_rows()
}) %>% bind_rows()
de.res.2020 <- de.res.2020 %>%
left_join(select(data.frame(rowData(sce.sc)), ID, Symbol), c("gene" = "ID"))
Show number of DEGs
################################################################################
# Show number of DEGs
################################################################################
plotMat <- de.res.2020 %>%
#filter(PValue < 0.05) %>%
filter(p.adj < 0.1) %>%
filter(logFC > 0.25 | logFC < -0.25) %>%
mutate(dir = ifelse(logFC < 0, "Down", "Up")) %>%
group_by(celltype, dir, Treatment) %>%
dplyr::rename(Direction = dir) %>%
dplyr::count()
plotMat <- plotMat %>%
mutate(n = ifelse(Direction == "Down", n * -1, n))
p <- plotMat %>%
#mutate(celltype = factor(celltype, levels = c("HCL", "MCL", "T-PLL & T-LGL", "T- & NK-cells"))) %>%
mutate(celltype = factor(celltype, levels = c("HCL", "MCL", "T-PLL", "Healthy T-Cells"))) %>%
ggplot(aes(x = celltype, y = n, fill = Direction)) +
geom_bar(colour = "black", stat = "identity") +
geom_hline(yintercept = 1) +
geom_text(aes(label = n), nudge_y = 50, data = plotMat[plotMat$Direction == "Up", ], size = 5) +
geom_text(aes(label = n), nudge_y = -50, data = plotMat[plotMat$Direction == "Down", ], size = 5) +
xlab("") + ylab("Number of Genes") + ggtitle("DEGs per Cell Type") +
lgd + scale_fill_manual(values = c("Up" = "#F44336", "Down" = "#1976D2")) +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1),
text = element_text(size = 22.5),
panel.border = element_rect(linewidth = 1)) +
facet_wrap(. ~ Treatment, nrow = 1)
p

ggsave(plot = p, filename = paste0(opt$plot, "n_deg_per_celltype.png"),
height = 15, width = 50, units = "cm")
Show volcano plots
geneSelec <- path.info %>% filter(pathway %in% c("NFKB", "NFKB_Chemo", "NFKB_GF", "NFKB_TF", "NFKB_Apo", "TNF"#, "TLR"
)) %>%
select(gene) %>% unlist() %>% as.vector()
lapply(c("MCL", "HCL", "T-PLL", "Healthy T-Cells"), function(y) {
pList <- lapply(c("Birinapant", "Everolimus", "Ibrutinib")[1], function(x) {
volTab <- de.res.2020 %>% filter(Treatment == x, celltype == y) %>% #%>% dplyr::rename(Symbol = gene)
mutate(p.adj = ifelse(p.adj < 1e-12, 1e-12, p.adj)) %>% arrange(p.adj)
## Define limits
minLim <- -1 * max(abs(de.res.2020$logFC))
maxLim <- max(abs(de.res.2020$logFC))
thresh <- 0.25
volTab$diffCol <- NA
volTab[volTab$logFC > 0 & volTab$p.adj < 0.1 & volTab$logFC > thresh, "diffCol"] <- "sigUp"
volTab[volTab$logFC < 0 & volTab$p.adj < 0.1 & volTab$logFC < -thresh, "diffCol"] <- "sigDown"
volTab[volTab$p.adj >= 0.1, "diffCol"] <- "ns"
volTab[volTab$logFC < thresh & volTab$logFC > -(thresh), "diffCol"] <- "ns"
p <- volTab %>%
mutate(logFC = ifelse(logFC > maxLim, maxLim, logFC)) %>%
mutate(logFC = ifelse(logFC < minLim, minLim, logFC)) %>%
#filter(gene %in% geneSelec) %>%
filter(Treatment == x) %>%
ggplot(aes(x = logFC, y = -log10(p.adj), fill = diffCol)) +
geom_point(shape = 21, size = 2) + ggtitle(paste0(y, " - ", x)) +
xlab("Log2FC") + ylab("-Log10(p.adj)") +
xlim(-5, 5) + ylim(0, 12.75) +
geom_hline(yintercept = -log10(0.1), linetype = "dashed") +
geom_vline(xintercept = -thresh, linetype = "dashed") + geom_vline(xintercept = thresh, linetype = "dashed") +
geom_label_repel(data = filter(volTab, Symbol %in% c(geneSelec), Treatment %in% x, p.adj < 0.1,
logFC > thresh | logFC < -(thresh)), aes(label = Symbol),
nudge_y = 0.6, size = 5, max.overlaps = getOption("ggrepel.max.overlaps", default = 30),
segment.linetype = 2, force = 20, alpha = 0.8, fill = "white") +
scale_fill_manual(values = c("sigUp" = "#F44336", "sigDown" = "#1976D2", "ns" = "grey")) +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5),
legend.position = "none",
text = element_text(size = 17.5),
axis.text = element_text(size = 15),
panel.background = element_rect(fill = "transparent",colour = NA),
plot.background = element_rect(fill = "transparent",colour = NA),
legend.background = element_rect(fill='transparent'),
strip.background = element_blank())
ggsave(plot = p, filename = paste0(opt$plot, "vol_", y, "_", x, ".png"),
height = 15, width = 15, unit = "cm")
p
})
grid.arrange(grobs = pList)
})
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).



## [[1]]
## TableGrob (1 x 1) "arrange": 1 grobs
## z cells name grob
## 1 1 (1-1,1-1) arrange gtable[layout]
##
## [[2]]
## TableGrob (1 x 1) "arrange": 1 grobs
## z cells name grob
## 1 1 (1-1,1-1) arrange gtable[layout]
##
## [[3]]
## TableGrob (1 x 1) "arrange": 1 grobs
## z cells name grob
## 1 1 (1-1,1-1) arrange gtable[layout]
##
## [[4]]
## TableGrob (1 x 1) "arrange": 1 grobs
## z cells name grob
## 1 1 (1-1,1-1) arrange gtable[layout]
Load per patient
patSub <- names(sce.pat)
sce.pat <- mclapply(patSub, mc.cores = ncores, function(x) {
sce <- sce.pat[[x]]
set.seed(100)
sce <- runUMAP(sce, n_neighbors = 15, min_dist = 0.35, name = "UMAP", BPPARAM = mcparam)
})
names(sce.pat) <- patSub
patSub <- c("H371", "H431", "H279")
lapply(patSub, function(x) {
sce <- sce.pat[[x]]
set.seed(100)
sce <- sce[,sample(1:ncol(sce), ncol(sce))]
lapply(c("Birinapant"), function(y) {
plotTab <- ggcells(sce)[[1]]
if(x == "H371") {tlt <- "T-PLL1"
} else if(x == "H431") {tlt <- "T-PLL2"
} else if(x == "H279") {tlt <- "T-PLL3"} else {
tlt <- x
}
p <- plotTab %>%
filter(Treatment %in% c("DMSO", y)) %>%
ggplot(aes(x = UMAP.1, y = UMAP.2, fill = Treatment)) +
geom_point(shape = 21, size = 3) +
ggtitle(tlt) +
lgd +
theme_void() +
theme(axis.text = element_blank(),
axis.ticks = element_blank(),
legend.position = "none",
plot.title = element_text(size = 20, hjust = 0.5)) +
scale_fill_manual(values = c("DMSO" = "grey70", "Birinapant" = "#AD1457", "Nutlin-3a" = "#FFA000",
"Ibrutinib" = "#283593", "Everolimus" = "#43A047",
"Selumetinib" = "#F8BBD0"))
ggsave(plot = p, filename = paste0(opt$plot, x, "_perpat_", y, "_umap.png"),
height = 12.5, width = 12.5, units = "cm")
p
})
})
## [[1]]
## [[1]][[1]]

##
##
## [[2]]
## [[2]][[1]]

##
##
## [[3]]
## [[3]][[1]]

lapply(c("NFKBIA"), function(y) {
lapply(c("Birinapant"), function(x) {
lapply(patSub, function(z) {
sce <- sce.pat[[z]]
rownames(sce) <- rowData(sce)$Symbol
plotTab <- ggcells(sce, features = y)[[1]]
plotTab <- plotTab %>%
filter(Treatment %in% c("DMSO", x)) %>%
mutate(Treatment = factor(Treatment, levels = c("DMSO", x))) %>%
pivot_longer(cols = y, names_to = "gene", values_to = "expr")
p <- plotTab %>%
ggplot(aes(x = UMAP.1, y = UMAP.2, fill = expr)) +
geom_point(shape = 21, size = 3) +
lgd +
theme_void() +
theme(axis.text = element_blank(),
axis.ticks = element_blank(),
legend.position = "none",
strip.text = element_text(size = 20, hjust = 0.5)) +
scale_fill_gradient(high = "red", low = "grey80",
limits=c(0, quantile(plotTab$expr, 0.99)), oob=squish) +
facet_wrap(. ~ Treatment)
ggsave(plot = p, filename = paste0(opt$plot, "perpat/", x, "_perpat_", y, "_", z, "_umap.png"),
height = 12.5, width = 27.5, units = "cm")
p
})
})
})
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
## # Was:
## data %>% select(y)
##
## # Now:
## data %>% select(all_of(y))
##
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## [[1]]
## [[1]][[1]]
## [[1]][[1]][[1]]

##
## [[1]][[1]][[2]]

##
## [[1]][[1]][[3]]

Perform differential gene expression testing - scRNA-Seq - without
subsampling
opt$subsample <- FALSE
compDrug <- c("Nutlin-3a", "Birinapant", "Ibrutinib", "Everolimus", "Selumetinib")
sce_sub <- sce.sc
plotTab <- ggcells(sce_sub)[[1]]
colData(sce_sub) <- colData(sce_sub) %>%
data.frame() %>%
left_join(types, by = "uniqueBarcode") %>%
mutate(celltype = ifelse(celltype %in% c("malignant B-cell", "malignant T-cell"), Diagnosis_simple, celltype)) %>%
mutate(celltype = ifelse(celltype %in% c("healthy T-cell"), "T-Cells", celltype)) %>%
mutate(celltype = ifelse(celltype %in% c("NK-cell"), "NK-Cells", celltype)) %>%
mutate(celltype = ifelse(celltype %in% c("monocyte"), "Monocytes", celltype)) %>%
mutate(celltype = ifelse(patientID%in% c("H501") & celltype %in% c("MCL", "HCL", "T-PLL"), "dead cell", celltype)) %>%
mutate(celltype = ifelse(patientID%in% c("H431", "H371", "H279") & celltype %in% c("MCL", "HCL", "T-LGL"), "dead cell", celltype)) %>%
mutate(celltype = ifelse(patientID%in% c("H526", "H525") & celltype %in% c("MCL", "T-PLL", "T-LGL"), "dead cell", celltype)) %>%
mutate(celltype = ifelse(patientID%in% c("H496", "H432", "P1029") & celltype %in% c("HCL", "T-PLL", "T-LGL"), "dead cell", celltype)) %>%
DataFrame()
if(opt$subsample == TRUE) {
cellSub <- c("HCL", "MCL", "T-PLL", "T-Cells")
} else {
cellSub <- c("HCL", "MCL", "T-PLL")
}
testTreat <- c("Birinapant", "Nutlin-3a", "Ibrutinib", "Everolimus", "Selumetinib")
countTab <- plotTab %>%
left_join(types, by = "uniqueBarcode") %>%
mutate(celltype = ifelse(celltype %in% c("malignant B-cell", "malignant T-cell"), Diagnosis_simple, celltype)) %>%
mutate(celltype = ifelse(celltype %in% c("healthy T-cell"), "T-Cells", celltype)) %>%
mutate(celltype = ifelse(celltype %in% c("NK-cell"), "NK-Cells", celltype)) %>%
mutate(celltype = ifelse(celltype %in% c("monocyte"), "Monocytes", celltype)) %>%
group_by(celltype, Treatment) %>%
dplyr::count() %>%
filter(celltype %in% cellSub, Treatment %in% testTreat) %>%
arrange(n)
minCells <- countTab[1, ]$n
minCells
## [1] 773
de.res.2020 <- lapply(testTreat, function(z) {
message(paste0("Fitting model for ", z))
lapply(cellSub, #mc.cores = ncores,
function(x) {
message(paste0("Celltype ", x))
sce.x <- sce_sub[, sce_sub$celltype == x]
sce.x <- sce.x[, sce.x$Treatment %in% c("DMSO", testTreat)]
message(ncol(sce.x), " cells")
if(opt$subsample == TRUE) {
message("Subsetting to ", minCells)
sce.x <- do.call(cbind, lapply(unique(sce.x$Treatment), function(y) {
sce.y <- sce.x[, sce.x$Treatment == y]
if(ncol(sce.y) == minCells) {sce.y} else {
set.seed(99)
sce.y <- sce.y[,sample(1:ncol(sce.y), minCells)]
sce.y
}
sce.y
}))
}
summed <- summarizeAssayByGroup(sce.x, ids = colData(sce.x)[,c("Diagnosis_simple", "patientID", "Treatment")],
statistics = "sum", assay.type = "counts") # get raw counts and sum them up
# summed <- applySCE(sce.x, aggregateAcrossCells, ids=colData(sce.x)[,c("Diagnosis_simple", "patientID", "Treatment")])
colData(summed)$Treatment <- factor(colData(summed)$Treatment, levels = c("DMSO", compDrug))
y <- DGEList(assay(summed), samples = colData(summed))
opt$subsample
if(opt$subsample == TRUE) {
discarded <- summed$ncells < 5
} else {
discarded <- summed$ncells < 30
}
y <- y[,!discarded]
message(paste0("Discarding ", sum(discarded), " samples"))
design <- model.matrix(~0 + factor(patientID) + Treatment, y$samples) # set coefficient (name or matrix column) - fig. 7 f1000
keep <- filterByExpr(y, design = design)
y <- y[keep,]
summary(keep)
message(paste0("Keeping ", sum(keep), " genes"))
y <- calcNormFactors(y)
# head(design)
y <- estimateDisp(y, design) # gene-wise dispersion (variance on top of poisson distribution)
#plotBCV(y)
fit <- glmQLFit(y, design, robust=TRUE) # QL way of fitting the model
#plotQLDisp(fit)
res <- glmQLFTest(fit, coef = paste0("Treatment", z))
# #res <- glmQLFTest(fit, coef = "TreatmentNutlin-3a")
summary(decideTests(res))
resTab <- res$table
resTab$gene <- rownames(resTab)
resTab$celltype <- x
resTab <- resTab #%>%
#mutate(p.adj = ifelse(p.adj < 1e-12, 1e-12, p.adj))
resTab$Treatment <- z
resTab %>% arrange(desc(logFC)) %>% mutate(p.adj = p.adjust(PValue, method = "BH"))
}) %>% bind_rows()
}) %>% bind_rows()
de.res.2020 <- de.res.2020 %>%
left_join(select(data.frame(rowData(sce.sc)), ID, Symbol), c("gene" = "ID"))
Heatmap with logFC per patient
opt$subsample <- FALSE
diffMat.list <- lapply(c("Birinapant"), function(z) {
message(paste0("Fitting model for ", z))
sce.x <- sce_sub[, sce_sub$celltype %in% c("T-PLL", "T-LGL", "HCL", "MCL")]
colData(sce.x) <- colData(sce.x) %>%
data.frame() %>%
mutate(celltype = ifelse(celltype %in% c("T-PLL", "T-LGL", "HCL", "MCL"), patientID, celltype)) %>%
DataFrame()
sce.x <- sce.x[, sce.x$Treatment %in% c("DMSO", z)]
summed <- summarizeAssayByGroup(sce.x, ids = colData(sce.x)[,c("celltype", "Treatment")],
statistics = "sum", assay.type = "counts") # get raw counts and sum them up
colData(summed)$Treatment <- factor(colData(summed)$Treatment, levels = c("DMSO", compDrug))
y <- DGEList(assay(summed), samples = colData(summed))
opt$subsample
if(opt$subsample == TRUE) {
discarded <- summed$ncells < 10
} else {
discarded <- summed$ncells < 30
}
y <- y[,!discarded]
message(paste0("Discarding ", sum(discarded), " samples"))
design <- model.matrix(~0 + factor(celltype) + Treatment, y$samples) # set coefficient (name or matrix column) - fig. 7 f1000
keep <- filterByExpr(y, design = design)
y <- y[keep,]
summary(keep)
message(paste0("Keeping ", sum(keep), " genes"))
y <- calcNormFactors(y)
count.mat <- cpm(y, log = TRUE)
colnames(count.mat) <- paste0(y$samples$celltype, "_", y$samples$Treatment)
count.mat
## Compute the logFC per patient
diffMat <- lapply(unique(sce.x$celltype), function(x) {
message(x)
## Subset to the samples of this patient
matSub <- count.mat[, str_detect(string = colnames(count.mat), pattern = x)]
## Save gene names
matSub <- matSub %>% data.frame() %>% rownames_to_column("gene")
matSub
## Compute logFC
if(ncol(matSub) == 3) {
colnames(matSub)[str_detect(string = colnames(matSub), pattern = "DMSO")] <- "DMSO"
colnames(matSub)[str_detect(string = colnames(matSub), pattern = gsub(x = x, pattern = "-", replacement = "."))] <- "Treatment"
matSub <- matSub %>%
mutate(logFC = Treatment - DMSO) %>%
select(gene, logFC)
} else {
matSub <- data.frame(gene = matSub$gene, logFC = NA)
}
colnames(matSub)[str_detect(string = colnames(matSub), pattern = "logFC")] <- x
matSub %>% column_to_rownames("gene")
}) %>% bind_cols()
})
## Fitting model for Birinapant
## Discarding 0 samples
## Keeping 12788 genes
## H431
## H526
## H496
## H371
## H501
## H279
## H525
## H432
## H452
names(diffMat.list) <- c("Birinapant")
geneSelec <- path.info %>% filter(pathway %in% c("NFKB", "NFKB_Chemo", "NFKB_GF", "NFKB_TF", "NFKB_Apo", "TNF"#, "TLR"
)) %>%
select(gene) %>% unlist() %>% as.vector()
patLabel <- data.frame(patientID = c("H371", "H431", "H279", "H501", "H452", "H432", "H496", "H525", "H526", "T-Cells"),
tumorType = c("T-PLL1", "T-PLL2", "T-PLL3", "T-LGL1", "MCL1", "MCL2", "MCL3", "HCL1", "HCL2", "T-Cells"))
pHeat <- lapply(c("Birinapant")[[1]], function(x) {
plotMat <- diffMat.list[[x]]
if(x == "Nutlin-3a") {
geneSelec <- path.info %>% filter(pathway %in% c("TP53up")) %>% select(gene) %>% unlist() %>% as.vector()
}
plotMat <- plotMat %>% rownames_to_column("ID") %>%
left_join(select(data.frame(rowData(sce.sc)), ID, Symbol), c("ID")) %>%
dplyr::select(-ID) %>% filter(Symbol %in% geneSelec) %>% column_to_rownames("Symbol")
geneSelec <- intersect(rownames(plotMat), geneSelec)
geneSig <- de.res.2020 %>% filter(p.adj < 0.1, Treatment %in% x, celltype %in% c("T-PLL", "MCL", "HCL")) %>%
filter(logFC > 0.25 | logFC < -0.25) %>%
pull(Symbol) %>% unique()
geneSelec <- intersect(geneSig, geneSelec)
plotMat <- t(plotMat[geneSelec, ])
rownames(plotMat) <- patLabel[match(rownames(plotMat), patLabel$patientID), ]$tumorType
pHeat <- Heatmap(t(plotMat), clustering_method_rows = "ward.D2",
clustering_method_columns = "ward.D2",
show_row_names = TRUE,
cluster_rows = TRUE, border = TRUE, #column_names_gp = gpar(fontsize = 8),
column_title = paste0(x, " - Log2FC Malignant Cells"),
rect_gp = gpar(col = "black", lwd = 0.1),
cluster_columns = TRUE, #row_names_gp = gpar(fontsize = 7),
col=colorRamp2(c(-2, 0, 2), colors = c("#1976D2", "white", "#F44336")), name = "Log2FC")
png(file=paste0(opt$plot, x, "_logFC_heatmap.png"),
height = 25, width = 12.5, res = 600, units = "cm", bg = "transparent")
draw(pHeat, background = "transparent")
dev.off()
pHeat
})
pHeat
## [[1]]

Output session info
## R version 4.3.2 (2023-10-31)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 22.04.3 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so; LAPACK version 3.10.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Etc/UTC
## tzcode source: system (glibc)
##
## attached base packages:
## [1] parallel grid stats4 stats graphics grDevices datasets
## [8] utils methods base
##
## other attached packages:
## [1] scales_1.3.0 numbat_1.4.0
## [3] Matrix_1.6-1.1 airr_1.5.0
## [5] ggsci_3.2.0 BiocParallel_1.36.0
## [7] ggnet_0.1.0 network_1.18.2
## [9] tftargets_1.3 ggrepel_0.9.5
## [11] readxl_1.4.3 edgeR_4.0.16
## [13] limma_3.58.1 circlize_0.4.16
## [15] ComplexHeatmap_2.18.0 gridExtra_2.3
## [17] gprofiler2_0.2.3 scater_1.30.1
## [19] scran_1.30.2 scuttle_1.12.0
## [21] SingleCellExperiment_1.24.0 SummarizedExperiment_1.32.0
## [23] Biobase_2.62.0 GenomicRanges_1.54.1
## [25] GenomeInfoDb_1.38.8 IRanges_2.36.0
## [27] S4Vectors_0.40.2 BiocGenerics_0.48.1
## [29] MatrixGenerics_1.14.0 matrixStats_1.3.0
## [31] lubridate_1.9.3 forcats_1.0.0
## [33] stringr_1.5.1 dplyr_1.1.4
## [35] purrr_1.0.2 readr_2.1.5
## [37] tidyr_1.3.1 tibble_3.2.1
## [39] ggplot2_3.5.1 tidyverse_2.0.0
##
## loaded via a namespace (and not attached):
## [1] RcppAnnoy_0.0.22 splines_4.3.2
## [3] bitops_1.0-7 ggplotify_0.1.2
## [5] cellranger_1.1.0 polyclip_1.10-7
## [7] lifecycle_1.0.4 doParallel_1.0.17
## [9] lattice_0.21-9 MASS_7.3-60
## [11] magrittr_2.0.3 plotly_4.10.4
## [13] sass_0.4.9 rmarkdown_2.27
## [15] jquerylib_0.1.4 yaml_2.3.9
## [17] metapod_1.10.1 RColorBrewer_1.1-3
## [19] abind_1.4-5 zlibbioc_1.48.2
## [21] quadprog_1.5-8 hahmmr_1.0.0
## [23] ggraph_2.2.1 RCurl_1.98-1.14
## [25] yulab.utils_0.1.5 tweenr_2.0.3
## [27] GenomeInfoDbData_1.2.11 irlba_2.3.5.1
## [29] tidytree_0.4.6 dqrng_0.4.1
## [31] DelayedMatrixStats_1.24.0 codetools_0.2-19
## [33] DelayedArray_0.28.0 ggforce_0.4.2
## [35] tidyselect_1.2.1 shape_1.4.6.1
## [37] aplot_0.2.3 farver_2.1.2
## [39] ScaledMatrix_1.10.0 viridis_0.6.5
## [41] jsonlite_1.8.8 GetoptLong_1.0.5
## [43] BiocNeighbors_1.20.2 tidygraph_1.3.1
## [45] iterators_1.0.14 systemfonts_1.1.0
## [47] foreach_1.5.2 tools_4.3.2
## [49] ragg_1.3.2 treeio_1.29.0.002
## [51] Rcpp_1.0.12 glue_1.7.0
## [53] SparseArray_1.2.4 xfun_0.45
## [55] withr_3.0.0 fastmap_1.2.0
## [57] bluster_1.12.0 fansi_1.0.6
## [59] digest_0.6.36 rsvd_1.0.5
## [61] parallelDist_0.2.6 timechange_0.3.0
## [63] R6_2.5.1 gridGraphics_0.5-1
## [65] textshaping_0.4.0 colorspace_2.1-0
## [67] Cairo_1.6-2 RhpcBLASctl_0.23-42
## [69] utf8_1.2.4 generics_0.1.3
## [71] renv_1.0.7 data.table_1.15.4
## [73] graphlayouts_1.1.1 httr_1.4.7
## [75] htmlwidgets_1.6.4 S4Arrays_1.2.1
## [77] uwot_0.2.2 pkgconfig_2.0.3
## [79] gtable_0.3.5 XVector_0.42.0
## [81] htmltools_0.5.8.1 clue_0.3-65
## [83] png_0.1-8 ggfun_0.1.5
## [85] knitr_1.48 rstudioapi_0.16.0
## [87] tzdb_0.4.0 rjson_0.2.21
## [89] coda_0.19-4.1 statnet.common_4.9.0
## [91] nlme_3.1-163 cachem_1.1.0
## [93] GlobalOptions_0.1.2 vipor_0.4.7
## [95] pillar_1.9.0 logger_0.3.0
## [97] vctrs_0.6.5 BiocSingular_1.18.0
## [99] beachmat_2.18.1 cluster_2.1.4
## [101] beeswarm_0.4.0 evaluate_0.24.0
## [103] cli_3.6.3 locfit_1.5-9.10
## [105] compiler_4.3.2 rlang_1.1.4
## [107] crayon_1.5.3 labeling_0.4.3
## [109] fs_1.6.4 ggbeeswarm_0.7.2
## [111] stringi_1.8.4 viridisLite_0.4.2
## [113] munsell_0.5.1 lazyeval_0.2.2
## [115] hms_1.1.3 patchwork_1.2.0
## [117] sparseMatrixStats_1.14.0 statmod_1.5.0
## [119] highr_0.11 igraph_2.0.3
## [121] memoise_2.0.1 scistreer_1.2.0
## [123] RcppParallel_5.1.8 bslib_0.7.0
## [125] phangorn_2.11.1 ggtree_3.10.1
## [127] fastmatch_1.1-4 ape_5.8